In [1]:
import pandas as pd
# # Define the path to your Excel file
# excel_file_path = r'..\Documents\All_Data_2023.xlsx'
# # Define the name of the sheet you want to load
# sheet_name_to_load = 'Final data'
# # Define the path for the output CSV file
# csv_output_path = r'..\data\final_data_2023.csv'
# try:
# # Read the specified sheet from the Excel file into a pandas DataFrame
# df = pd.read_excel(excel_file_path, sheet_name=sheet_name_to_load)
# # Save the DataFrame to a CSV file
# # index=False prevents pandas from writing the DataFrame index as a column
# df.to_csv(csv_output_path, index=False)
# print(f"Sheet '{sheet_name_to_load}' from '{excel_file_path}' successfully saved to '{csv_output_path}'")
# except FileNotFoundError:
# print(f"Error: The file '{excel_file_path}' was not found.")
# except Exception as e:
# # Catches other potential errors like the sheet name not existing
# print(f"An error occurred: {e}")
In [2]:
csv_output_path = r'..\data\final_data_2023.csv'
df = pd.read_csv(csv_output_path)
df.head()
Out[2]:
| Entry | Entry_species | Species | Population | Date | ID | Flower_No. | Length_mm | Width_mm | Ratio_len/wid | ... | Flowers_total | Marked | Wilted | Fruits_marked | Fruits_notmarked | Fruits_total | Fruit | seeds | weight | Avg.weight | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 23 | I.atropurpurea | NET | 2023-02-26 | 3 | 1 | 63.4 | 55.0 | 1.152727 | ... | 3.0 | 1.0 | 2.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 |
| 1 | 2 | 24 | I.atropurpurea | NET | 2023-02-26 | 10 | 1 | 96.0 | 78.5 | 1.222930 | ... | 3.0 | 2.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 14.0 | 1008.5 | 72.035714 |
| 2 | 3 | 36 | I.atropurpurea | NET | 2023-03-05 | 10 | 2 | 77.1 | 63.4 | 1.216088 | ... | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.000000 |
| 3 | 4 | 46 | I.atropurpurea | NET | 2023-03-05 | 29 | 1 | 61.0 | 57.2 | 1.066434 | ... | 2.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 |
| 4 | 5 | 35 | I.atropurpurea | NET | 2023-02-26 | 34 | 1 | 62.5 | 59.7 | 1.046901 | ... | 2.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 |
5 rows × 27 columns
In [4]:
# Import the ProfileReport class
from ydata_profiling import ProfileReport
# Generate the profile report
# title is optional but helpful for identifying the report
profile = ProfileReport(df, title="Iris Data Profiling Report")
# Display the report within the notebook
# This works well in Jupyter environments
profile.to_notebook_iframe()
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]
100%|██████████| 27/27 [00:00<00:00, 774.74it/s]
Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]
Render HTML: 0%| | 0/1 [00:00<?, ?it/s]